import requests  
from bs4 import BeautifulSoup  

from datetime import datetime
import pymysql

def getUrl():
    # Step 1: 抓取数据
    url = 'https://www.xuexila.com/yu/duilian/c2293488.html'
    url = 'https://www.xuexila.com/yu/duilian/c2293494.html'
    url = 'https://www.xuexila.com/yu/duilian/c2226491.html'
    url = 'https://www.xuexila.com/yu/duilian/c2226064.html'
    url = 'https://www.xuexila.com/yu/duilian/c2305335.html'
    url = 'https://www.xuexila.com/yu/duilian/c2297507.html'
    response = requests.get(url)
    # 打印响应头看看编码
    print(response.headers)

    # 根据响应头设置编码
    response.encoding = response.apparent_encoding  # 自动检测编码（通常是utf-8或gbk）

    soup = BeautifulSoup(response.text, 'html.parser')

    # Step 2: 解析数据
    couplets = []
    for p in soup.find_all('p'):
        text = p.text.strip()
        #print(text)
        if "上联：" in text and "下联：" in text and "横批：" in text:
            # 提取上联，下联和横批
            upper = text.split("上联：")[1].split("下联：")[0].strip()
            lower = text.split("下联：")[1].split("横批：")[0].strip()


            subtitle = text.split("横批：")[1].strip()
            # 替换掉一些标点符号
            upper = upper.replace(";", "").replace("。", "").replace("，", "")
            lower = lower.replace(";", "").replace("。", "").replace("，", "")
            subtitle = subtitle.replace(";", "").replace("。", "").replace("，", "")

            print(upper, lower, subtitle)
            # 计算字数
            num = len(upper)
            couplets.append((upper, lower, subtitle, num))
    return couplets

# Step 3: 存储到 MySQL  
# 数据库配置  
db_config = {  
    'host': '127.0.0.1',
    'user': 'root',
    'password': '123',
    'database': 'mymusic'
}

# 连接到 MySQL
connection = pymysql.connect(**db_config)
cursor = connection.cursor()

# 创建表（如果不存在）
cursor.execute('''  
    CREATE TABLE IF NOT EXISTS dati_couplet (  
        id BIGINT NOT NULL AUTO_INCREMENT COMMENT '主键',  
        cp_upper VARCHAR(100) COMMENT '上联',  
        cp_lower VARCHAR(100) COMMENT '下联',  
        cp_top_title VARCHAR(100) COMMENT '横批',  
        cp_remark VARCHAR(255) COMMENT '备注',  
        create_time DATETIME COMMENT '创建时间',  
        sort BIGINT DEFAULT 99 COMMENT '排序',  
        num INT COMMENT '对联字数',  
        tags VARCHAR(255) COMMENT '标签分类',  
        exp VARCHAR(255) COMMENT '扩展字段',  
        PRIMARY KEY (id)  
    ) CHARSET=utf8mb4 COLLATE=utf8mb4_bin COMMENT='对联表'  
''')
def insertDb(couplets):
    # 插入数据
    for upper, lower, subtitle, num in couplets:
        cursor.execute('''  
            INSERT INTO dati_couplet (cp_upper, cp_lower, cp_top_title, create_time, num)  
            VALUES (%s, %s, %s, %s, %s)  
        ''', (upper, lower, subtitle, datetime.now(), num))

    connection.commit()

    # 关闭连接
    cursor.close()
    connection.close()

    print("数据存储成功！")

if __name__ == '__main__':
    couplets = getUrl()
    insertDb(couplets)